# -*- coding: utf-8 -*-
import os.path
from time import sleep
import urllib


# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
class ChinaDailyPipeline(object):
    def process_item(self, item, spider):
        # 中文乱码解决
        import sys
        reload(sys)
        sys.setdefaultencoding("utf-8")
        
        # 存储路劲
        save_path = spider.settings.get('CHINADAILY_DATA_PATH')
        abs_save_path = os.path.abspath(save_path)
        if not os.path.isdir(abs_save_path):
            os.mkdir(abs_save_path, 0755)
        
        # 内容存储
        news_id = item['news_id']
        content_file_path = os.path.join(abs_save_path, news_id + '.txt')
        with open(content_file_path, "wb+") as f_news:
            content = "Headline:\n\t" + item['headline'] + "\nCategory:\n\t" + item['category'] + "\nNews Link:\n\t" + item['link'] + "\nOutline:\n\t" + item['outline']
            f_news.write(content)
        
        # 图片存储
        image_suffix = os.path.splitext(item['image_url'])[1]
        image_file_path = os.path.join(abs_save_path, news_id + image_suffix)
        urllib.urlretrieve(item['image_url'], image_file_path)
        
        return item
